1 package org.apache.solr.handler.clustering;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 import java.io.File;
20 import java.util.Map;
21
22 import org.apache.commons.io.FileUtils;
23 import org.apache.solr.SolrTestCaseJ4;
24 import org.apache.solr.common.SolrInputDocument;
25 import org.junit.BeforeClass;
26
27
28
29
30
31 public abstract class AbstractClusteringTestCase extends SolrTestCaseJ4 {
32 protected static int numberOfDocs = 0;
33
34 @BeforeClass
35 public static void beforeClass() throws Exception {
36 File testHome = createTempDir().toFile();
37 FileUtils.copyDirectory(getFile("clustering/solr"), testHome);
38 initCore("solrconfig.xml", "schema.xml", testHome.getAbsolutePath());
39 numberOfDocs = 0;
40 for (String[] doc : DOCUMENTS) {
41 assertNull(h.validateUpdate(adoc("id", Integer.toString(numberOfDocs), "url", doc[0], "title", doc[1], "snippet", doc[2])));
42 numberOfDocs++;
43 }
44
45
46 final SolrInputDocument multiValuedSnippet = new SolrInputDocument();
47 multiValuedSnippet.addField("id", numberOfDocs++);
48 multiValuedSnippet.addField("title", "Title");
49 multiValuedSnippet.addField("url", "URL");
50 multiValuedSnippet.addField("snippet", "First value of multi field. Some more text. And still more.");
51 multiValuedSnippet.addField("snippet", "Second value of multi field. Some more text. And still more.");
52 multiValuedSnippet.addField("snippet", "Third value of multi field. Some more text. And still more.");
53 assertNull(h.validateUpdate(adoc(multiValuedSnippet)));
54
55
56 final SolrInputDocument multiFieldDoc = new SolrInputDocument();
57 multiFieldDoc.addField("id", numberOfDocs++);
58 multiFieldDoc.addField("title", "Title field");
59 multiFieldDoc.addField("heading", "Heading field");
60 multiFieldDoc.addField("url", "URL");
61 multiFieldDoc.addField("snippet", "Snippet field: this is the contents of the snippet field.");
62 multiFieldDoc.addField("body", "Body field: this is the contents of the body field that will get clustered together with snippet.");
63 assertNull(h.validateUpdate(adoc(multiFieldDoc)));
64
65
66 final SolrInputDocument docWithOneSupprtedLanguage = new SolrInputDocument();
67 docWithOneSupprtedLanguage.addField("id", numberOfDocs++);
68 docWithOneSupprtedLanguage.addField("title", "");
69 docWithOneSupprtedLanguage.addField("url", "one_supported_language");
70 docWithOneSupprtedLanguage.addField("lang", "zh-cn");
71 assertNull(h.validateUpdate(adoc(docWithOneSupprtedLanguage)));
72
73
74 final SolrInputDocument docWithOneSupprtedLanguageOfMany = new SolrInputDocument();
75 docWithOneSupprtedLanguageOfMany.addField("id", numberOfDocs++);
76 docWithOneSupprtedLanguageOfMany.addField("url", "one_supported_language_of_many");
77 docWithOneSupprtedLanguageOfMany.addField("lang", "zh-tw");
78 docWithOneSupprtedLanguageOfMany.addField("lang", "POLISH");
79 docWithOneSupprtedLanguageOfMany.addField("lang", "de");
80 assertNull(h.validateUpdate(adoc(docWithOneSupprtedLanguageOfMany)));
81
82
83 final SolrInputDocument docWithCustomFields = new SolrInputDocument();
84 docWithCustomFields.addField("id", numberOfDocs++);
85 docWithCustomFields.addField("url", "custom_fields");
86 docWithCustomFields.addField("intfield_i", 10);
87 docWithCustomFields.addField("floatfield_f", 10.5);
88 docWithCustomFields.addField("heading", "first");
89 docWithCustomFields.addField("heading", "second");
90 assertNull(h.validateUpdate(adoc(docWithCustomFields)));
91 assertNull(h.validateUpdate(commit()));
92 }
93
94
95
96
97 protected final Map<String,SearchClusteringEngine> getSearchClusteringEngines(ClusteringComponent comp) {
98 return comp.getSearchClusteringEngines();
99 }
100
101 final static String[][] DOCUMENTS = new String[][]{
102 {"http://en.wikipedia.org/wiki/Data_mining",
103 "Data Mining - Wikipedia",
104 "Article about knowledge-discovery in databases (KDD), the practice of automatically searching large stores of data for patterns."},
105
106
107 {"http://en.wikipedia.org/wiki/Datamining",
108 "Data mining - Wikipedia, the free encyclopedia",
109 "Data mining is the entire process of applying computer-based methodology, ... Moreover, some data-mining systems such as neural networks are inherently geared ..."},
110
111
112 {"http://www.statsoft.com/textbook/stdatmin.html",
113 "Electronic Statistics Textbook: Data Mining Techniques",
114 "Outlines the crucial concepts in data mining, defines the data warehousing process, and offers examples of computational and graphical exploratory data analysis techniques."},
115
116
117 {"http://www.thearling.com/text/dmwhite/dmwhite.htm",
118 "An Introduction to Data Mining",
119 "Data mining, the extraction of hidden predictive information from large ... Data mining tools predict future trends and behaviors, allowing businesses to ..."},
120
121
122 {"http://www.anderson.ucla.edu/faculty/jason.frand/teacher/technologies/palace/datamining.htm",
123 "Data Mining: What is Data Mining?",
124 "Outlines what knowledge discovery, the process of analyzing data from different perspectives and summarizing it into useful information, can do and how it works."},
125
126
127 {"http://www.spss.com/datamine",
128 "Data Mining Software, Data Mining Applications and Data Mining Solutions",
129 "The patterns uncovered using data mining help organizations make better and ... data mining customer ... Data mining applications, on the other hand, embed ..."},
130
131
132 {"http://www.kdnuggets.com/",
133 "KD Nuggets",
134 "Newsletter on the data mining and knowledge industries, offering information on data mining, knowledge discovery, text mining, and web mining software, courses, jobs, publications, and meetings."},
135
136
137 {"http://www.answers.com/topic/data-mining",
138 "data mining: Definition from Answers.com",
139 "data mining n. The automatic extraction of useful, often previously unknown information from large databases or data ... Data Mining For Investing ..."},
140
141
142 {"http://www.statsoft.com/products/dataminer.htm",
143 "STATISTICA Data Mining and Predictive Modeling Solutions",
144 "GRC site-wide menuing system research and development. ... Contact a Data Mining Solutions Consultant. News and Success Stories. Events ..."},
145
146
147 {"http://datamining.typepad.com/",
148 "Data Mining: Text Mining, Visualization and Social Media",
149 "Commentary on text mining, data mining, social media and data visualization. ... While mining Twitter data for business and marketing intelligence (trend/buzz ..."},
150
151
152 {"http://www.twocrows.com/",
153 "Two Crows Corporation",
154 "Dedicated to the development, marketing, sales and support of tools for knowledge discovery to make data mining accessible and easy to use."},
155
156
157 {"http://www.thearling.com/",
158 "Thearling.com",
159 "Kurt Thearling's site dedicated to sharing information about data mining, the automated extraction of hidden predictive information from databases, and other analytic technologies."},
160
161
162 {"http://www.ccsu.edu/datamining/",
163 "CCSU - Data Mining",
164 "Offers degrees and certificates in data mining. Allows students to explore cutting-edge data mining techniques and applications: market basket analysis, decision trees, neural networks, machine learning, web mining, and data modeling."},
165
166
167 {"http://www.oracle.com/technology/products/bi/odm",
168 "Oracle Data Mining",
169 "Oracle Data Mining Product Center ... New Oracle Data Mining Powers New Social CRM Application (more information ... Mining High-Dimensional Data for ..."},
170
171
172 {"http://databases.about.com/od/datamining/a/datamining.htm",
173 "Data Mining: An Introduction",
174 "About.com article on how businesses are discovering new trends and patterns of behavior that previously went unnoticed through data mining, automated statistical analysis techniques."},
175
176
177 {"http://www.dmoz.org/Computers/Software/Databases/Data_Mining/",
178 "Open Directory - Computers: Software: Databases: Data Mining",
179 "Data Mining and Knowledge Discovery - A peer-reviewed journal publishing ... Data mining creates information assets that an organization can leverage to ..."},
180
181
182 {"http://www.cs.wisc.edu/dmi/",
183 "DMI:Data Mining Institute",
184 "Data Mining Institute at UW-Madison ... The Data Mining Institute (DMI) was started on June 1, 1999 at the Computer ... of the Data Mining Group of Microsoft ..."},
185
186
187 {"http://www.the-data-mine.com/",
188 "The Data Mine",
189 "Provides information about data mining also known as knowledge discovery in databases (KDD) or simply knowledge discovery. List software, events, organizations, and people working in data mining."},
190
191
192 {"http://www.statserv.com/datamining.html",
193 "St@tServ - About Data Mining",
194 "St@tServ Data Mining page ... Data mining in molecular biology, by Alvis Brazma. Graham Williams page. Knowledge Discovery and Data Mining Resources, ..."},
195
196
197 {"http://ocw.mit.edu/OcwWeb/Sloan-School-of-Management/15-062Data-MiningSpring2003/CourseHome/index.htm",
198 "MIT OpenCourseWare | Sloan School of Management | 15.062 Data Mining ...",
199 "Introduces students to a class of methods known as data mining that assists managers in recognizing patterns and making intelligent use of massive amounts of ..."},
200
201
202 {"http://www.pentaho.com/products/data_mining/",
203 "Pentaho Commercial Open Source Business Intelligence: Data Mining",
204 "For example, data mining can warn you there's a high probability a specific ... Pentaho Data Mining is differentiated by its open, standards-compliant nature, ..."},
205
206
207 {"http://www.investorhome.com/mining.htm",
208 "Investor Home - Data Mining",
209 "Data Mining or Data Snooping is the practice of searching for relationships and ... Data mining involves searching through databases for correlations and patterns ..."},
210
211
212 {"http://www.datamining.com/",
213 "Predictive Modeling and Predictive Analytics Solutions | Enterprise ...",
214 "Insightful Enterprise Miner - Enterprise data mining for predictive modeling and predictive analytics."},
215
216
217 {"http://www.sourcewatch.org/index.php?title=Data_mining",
218 "Data mining - SourceWatch",
219 "These agencies reported 199 data mining projects, of which 68 ... Office, \"DATA MINING. ... powerful technology known as data mining -- and how, in the ..."},
220
221
222 {"http://www.autonlab.org/tutorials/",
223 "Statistical Data Mining Tutorials",
224 "Includes a set of tutorials on many aspects of statistical data mining, including the foundations of probability, the foundations of statistical data analysis, and most of the classic machine learning and data mining algorithms."},
225
226
227 {"http://www.microstrategy.com/data-mining/index.asp",
228 "Data Mining",
229 "With MicroStrategy, data mining scoring is fully integrated into mainstream ... The integration of data mining models from other applications is accomplished by ..."},
230
231
232 {"http://www.datamininglab.com/",
233 "Elder Research",
234 "Provides consulting and short courses in data mining and pattern discovery patterns in data."},
235
236
237 {"http://www.sqlserverdatamining.com/",
238 "SQL Server Data Mining > Home",
239 "SQL Server Data Mining Portal ... Data Mining as an Application Platform (Whitepaper) Creating a Web Cross-sell Application with SQL Server 2005 Data Mining (Article) ..."},
240
241
242 {"http://databases.about.com/cs/datamining/g/dmining.htm",
243 "Data Mining",
244 "What is data mining? Find out here! ... Book Review: Data Mining and Statistical Analysis Using SQL. What is Data Mining, and What Does it Have to Do with ..."},
245
246
247 {"http://www.sas.com/technologies/analytics/datamining/index.html",
248 "Data Mining Software and Text Mining | SAS",
249 "... raw data to smarter ... Data Mining is an iterative process of creating ... The knowledge gleaned from data and text mining can be used to fuel ..."}
250 };
251 }